Implement ImapFolderAgent.

Akinori MUSHA vor 10 Jahren
Ursprung
Commit
2c2ebf533c

+ 429 - 0
app/models/agents/imap_folder_agent.rb

@@ -0,0 +1,429 @@
1
+require 'delegate'
2
+require 'net/imap'
3
+require 'mail'
4
+
5
+module Agents
6
+  class ImapFolderAgent < Agent
7
+    cannot_receive_events!
8
+
9
+    default_schedule "every_30m"
10
+
11
+    description <<-MD
12
+
13
+      The ImapFolderAgent checks an IMAP server in specified folders
14
+      and creates Events based on new unread mails.
15
+
16
+      Specify an IMAP server to connect with `host`, and set `ssl` to
17
+      true if the server supports IMAP over SSL.  Specify `port` if
18
+      you need to connect to a port other than standard (143 or 993
19
+      depending on the `ssl` value).
20
+
21
+      Specify login credentials in `username` and `password`.
22
+
23
+      List the names of folders to check in `folders`.
24
+
25
+      To narrow mails by conditions, build a `conditions` hash with
26
+      the following keys:
27
+
28
+      - "subject"
29
+      - "body"
30
+
31
+          Specify a string of the regular expression that is matched
32
+          against the decoded subject/body of each mail.
33
+
34
+          Use the `(?i)` directive for case insensitive search.
35
+
36
+          When a mail has multiple non-attachment text parts, they are
37
+          prioritized according to the `mime_types` option (which see
38
+          below) and the first part that matches a "body" pattern, if
39
+          specified, will be chosen as the "body" value in a created
40
+          event.
41
+
42
+          Named captues will appear in the "matches" hash in a created
43
+          event.
44
+
45
+      - "from", "to", "cc"
46
+
47
+          Specify a shell glob pattern string that is matched against
48
+          mail addresses extracted from the corresponding header
49
+          values of each mail.
50
+
51
+          Patterns match addresses in case insensitive manner.
52
+
53
+          Multiple pattern strings can be specified in an array, in
54
+          which case a mail is selected if any of the patterns
55
+          matches. (i.e. patterns are OR'd)
56
+
57
+      - "mime_types"
58
+
59
+          Specify an array of MIME types to tell which non-attachment
60
+          part of a mail among its text/* parts should be used as mail
61
+          body.  The default value is `['text/plain', 'text/enriched',
62
+          'text/html']`.
63
+
64
+      - "has_attachment"
65
+
66
+          Setting this to true or false means only mails that does or does
67
+          not have an attachment are selected.
68
+
69
+          If this key is unspecified or set to null, it is ignored.
70
+
71
+      Set `mark_as_read` to true to mark found mails as read.
72
+    MD
73
+
74
+    event_description <<-MD
75
+      Events look like this:
76
+
77
+          {
78
+            "subject": "...",
79
+            "from": "Nanashi <nanashi.gombeh@example.jp>",
80
+            "to": ["Jane <jane.doe@example.com>"],
81
+            "cc": [],
82
+            "date": "2014-05-10T03:47:20+0900",
83
+            "mime_type": "text/plain",
84
+            "body": "Hello,\n\n...",
85
+            "matches": {
86
+            }
87
+          }
88
+    MD
89
+
90
+    IDCACHE_SIZE = 100
91
+
92
+    def working?
93
+      event_created_within?(options['expected_update_period_in_days']) && !recent_error_logs?
94
+    end
95
+
96
+    def default_options
97
+      {
98
+        'expected_update_period_in_days' => "1",
99
+        'host' => 'imap.gmail.com',
100
+        'ssl' => true,
101
+        'username' => 'your.account',
102
+        'password' => 'your.password',
103
+        'folders' => %w[INBOX],
104
+        'conditions' => {}
105
+      }
106
+    end
107
+
108
+    def validate_options
109
+      %w[host username password].each { |key|
110
+        String === options[key] or
111
+          errors.add(:base, '%s is required and must be a string' % key)
112
+      }
113
+
114
+      if options['port'].present?
115
+        errors.add(:base, "port must be a positive integer") unless is_positive_integer?(options['port'])
116
+      end
117
+
118
+      %w[ssl mark_as_read].each { |key|
119
+        if options[key].present?
120
+          case options[key]
121
+          when true, false
122
+          else
123
+            errors.add(:base, '%s must be a boolean value' % key)
124
+          end
125
+        end
126
+      }
127
+
128
+      case mime_types = options['mime_types']
129
+      when nil
130
+      when Array
131
+        mime_types.all? { |mime_type|
132
+          String === mime_type && mime_type.start_with?('text/')
133
+        } or errors.add(:base, 'mime_types may only contain strings that match "text/*".')
134
+        if mime_types.empty?
135
+          errors.add(:base, 'mime_types should not be empty')
136
+        end
137
+      else
138
+        errors.add(:base, 'mime_types must be an array')
139
+      end
140
+
141
+      case folders = options['folders']
142
+      when nil
143
+      when Array
144
+        folders.all? { |folder|
145
+          String === folder
146
+        } or errors.add(:base, 'folders may only contain strings')
147
+        if folders.empty?
148
+          errors.add(:base, 'folders should not be empty')
149
+        end
150
+      else
151
+        errors.add(:base, 'folders must be an array')
152
+      end
153
+
154
+      case conditions = options['conditions']
155
+      when nil
156
+      when Hash
157
+        conditions.each { |key, value|
158
+          value.present? or next
159
+          case key
160
+          when 'subject', 'body'
161
+            case value
162
+            when String
163
+              begin
164
+                Regexp.new(value)
165
+              rescue
166
+                errors.add(:base, 'conditions.%s contains an invalid regexp' % key)
167
+              end
168
+            else
169
+              errors.add(:base, 'conditions.%s contains a non-string object' % key)
170
+            end
171
+          when 'from', 'to', 'cc'
172
+            Array(value).each { |pattern|
173
+              case pattern
174
+              when String
175
+                begin
176
+                  glob_match?(pattern, '')
177
+                rescue
178
+                  errors.add(:base, 'conditions.%s contains an invalid glob pattern' % key)
179
+                end
180
+              else
181
+                errors.add(:base, 'conditions.%s contains a non-string object' % key)
182
+              end
183
+            }
184
+          when 'has_attachment'
185
+            case value
186
+            when true, false
187
+            else
188
+              errors.add(:base, 'conditions.%s must be a boolean value or null' % key)
189
+            end
190
+          end
191
+        }
192
+      else
193
+        errors.add(:base, 'conditions must be a hash')
194
+      end
195
+
196
+      if options['expected_update_period_in_days'].present?
197
+        errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
198
+      end
199
+    end
200
+
201
+    def check
202
+      # 'seen' keeps a hash of { uidvalidity => uids, ... } which
203
+      # lists unread mails in watched folders.
204
+      seen = memory['seen'] || {}
205
+      new_seen = Hash.new { |hash, key|
206
+        hash[key] = []
207
+      }
208
+
209
+      # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
210
+      # most recent notified mails.
211
+      notified = memory['notified'] || []
212
+
213
+      each_unread_mail { |mail|
214
+        new_seen[mail.uidvalidity] << mail.uid
215
+
216
+        next if (uids = seen[mail.uidvalidity]) && uids.include?(mail.uid)
217
+
218
+        next if notified.include?(mail.message_id)
219
+
220
+        body_parts = mail.body_parts(mime_types)
221
+        matched_part = nil
222
+        matches = {}
223
+
224
+        options['conditions'].all? { |key, value|
225
+          case key
226
+          when 'subject'
227
+            value.present? or next true
228
+            re = Regexp.new(value)
229
+            if m = re.match(mail.subject)
230
+              m.names.each { |name|
231
+                matches[name] = m[name]
232
+              }
233
+              true
234
+            else
235
+              false
236
+            end
237
+          when 'body'
238
+            value.present? or next true
239
+            re = Regexp.new(value)
240
+            matched_part = body_parts.find { |part|
241
+               if m = re.match(part.decoded)
242
+                 m.names.each { |name|
243
+                   matches[name] = m[name]
244
+                 }
245
+                 true
246
+               else
247
+                 false
248
+               end
249
+            }
250
+          when 'from', 'to', 'cc'
251
+            value.present? or next true
252
+            mail.header[key].addresses.any? { |address|
253
+              Array(value).any? { |pattern|
254
+                glob_match?(pattern, address)
255
+              }
256
+            }
257
+          when 'has_attachment'
258
+            value == mail.has_attachment?
259
+          else
260
+            log 'Unknown condition key ignored: %s' % key
261
+            true
262
+          end
263
+        } or next
264
+
265
+        matched_part ||= body_parts.first
266
+
267
+        if matched_part
268
+          mime_type = matched_part.mime_type
269
+          body = matched_part.decoded
270
+        else
271
+          mime_type = 'text/plain'
272
+          body = ''
273
+        end
274
+
275
+        create_event :payload => {
276
+          'subject' => mail.subject,
277
+          'from' => mail.from_addrs.first,
278
+          'to' => mail.to_addrs,
279
+          'cc' => mail.cc_addrs,
280
+          'date' => (mail.date.iso8601 rescue nil),
281
+          'mime_type' => mime_type,
282
+          'body' => body,
283
+          'matches' => matches,
284
+          'has_attachment' => mail.has_attachment?,
285
+        }
286
+
287
+        if options['mark_as_read']
288
+          log 'Marking as read'
289
+          mail.mark_as_read
290
+        end
291
+
292
+        notified << mail.message_id if mail.message_id
293
+      }
294
+
295
+      notified.slice!(0...-IDCACHE_SIZE) if notified.size > IDCACHE_SIZE
296
+
297
+      memory['seen'] = new_seen
298
+      memory['notified'] = notified
299
+      save!
300
+    end
301
+
302
+    def each_unread_mail
303
+      host, port, ssl, username = options.values_at(:host, :port, :ssl, :username)
304
+
305
+      log "Connecting to #{host}#{':%d' % port if port}#{' via SSL' if ssl}"
306
+      Client.open(host, port, ssl) { |imap|
307
+        log "Logging in as #{username}"
308
+        imap.login(username, options[:password])
309
+
310
+        options['folders'].each { |folder|
311
+          log "Selecting the folder: %s" % folder
312
+
313
+          imap.select(folder)
314
+
315
+          unseen = imap.search('UNSEEN')
316
+
317
+          if unseen.empty?
318
+            log "No unread mails"
319
+            next
320
+          end
321
+
322
+          imap.fetch_mails(unseen).each { |mail|
323
+            yield mail
324
+          }
325
+        }
326
+      }
327
+    ensure
328
+      log 'Connection closed'
329
+    end
330
+
331
+    def mime_types
332
+      options['mime_types'] || %w[text/plain text/enriched text/html]
333
+    end
334
+
335
+    private
336
+
337
+    def is_positive_integer?(value)
338
+      Integer(value) >= 0
339
+    rescue
340
+      false
341
+    end
342
+
343
+    def glob_match?(pattern, value)
344
+      File.fnmatch?(pattern, value, File::FNM_CASEFOLD | File::FNM_EXTGLOB)
345
+    end
346
+
347
+    class Client < ::Net::IMAP
348
+      class << self
349
+        def open(host, port, ssl)
350
+          imap = new(host, port, ssl)
351
+          yield imap
352
+        ensure
353
+          imap.disconnect
354
+        end
355
+      end
356
+
357
+      def select(folder)
358
+        ret = super
359
+        @uidvalidity = responses['UIDVALIDITY'].last
360
+        ret
361
+      end
362
+
363
+      def fetch_mails(set)
364
+        fetch(set, %w[UID RFC822.HEADER]).map { |data|
365
+          Message.new(self, @uidvalidity, data)
366
+        }
367
+      end
368
+    end
369
+
370
+    class Message < SimpleDelegator
371
+      DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
372
+
373
+      attr_reader :uidvalidity, :uid
374
+
375
+      def initialize(client, uidvalidity, fetch_data)
376
+        @client = client
377
+        @uidvalidity = uidvalidity
378
+        attr = fetch_data.attr
379
+        @uid = attr['UID']
380
+        super(Mail.read_from_string(attr['RFC822.HEADER']))
381
+      end
382
+
383
+      def has_attachment?
384
+        @has_attachment ||=
385
+          begin
386
+            data = @client.uid_fetch(@uid, 'BODYSTRUCTURE').first
387
+            struct_has_attachment?(data.attr['BODYSTRUCTURE'])
388
+          end
389
+      end
390
+
391
+      def fetch
392
+        @parsed ||=
393
+          begin
394
+            data = @client.uid_fetch(@uid, 'BODY.PEEK[]').first
395
+            Mail.read_from_string(data.attr['BODY[]'])
396
+          end
397
+      end
398
+
399
+      def body_parts(mime_types = DEFAULT_BODY_MIME_TYPES)
400
+        mail = fetch
401
+        if mail.multipart?
402
+          mail.body.set_sort_order(mime_types)
403
+          mail.body.sort_parts!
404
+          mail.all_parts
405
+        else
406
+          [mail]
407
+        end.reject { |part|
408
+          part.multipart? || part.attachment? || !part.text? ||
409
+            !mime_types.include?(part.mime_type)
410
+        }
411
+      end
412
+
413
+      def mark_as_read
414
+        @client.uid_store(@uid, '+FLAGS', [:Seen])
415
+      end
416
+
417
+      private
418
+
419
+      def struct_has_attachment?(struct)
420
+        struct.multipart? && (
421
+          struct.subtype == 'MIXED' ||
422
+          struct.parts.any? { |part|
423
+            struct_has_attachment?(part)
424
+          }
425
+        )
426
+      end
427
+    end
428
+  end
429
+end

+ 22 - 0
spec/data_fixtures/imap1.eml

@@ -0,0 +1,22 @@
1
+From: Nanashi <nanashi.gombeh@example.jp>
2
+Date: Fri, 9 May 2014 16:00:00 +0900
3
+Message-ID: <foo.123@mail.example.jp>
4
+Subject: some subject
5
+To: Jane <jane.doe@example.com>, John <john.doe@example.com>
6
+MIME-Version: 1.0
7
+Content-Type: multipart/alternative; boundary=d8c92622e09101e4bc833685557b
8
+
9
+--d8c92622e09101e4bc833685557b
10
+Content-Type: text/plain; charset=UTF-8
11
+
12
+Some plain text
13
+Some second line
14
+
15
+--d8c92622e09101e4bc833685557b
16
+Content-Type: text/html; charset=UTF-8
17
+Content-Transfer-Encoding: quoted-printable
18
+
19
+<div dir=3D"ltr">Some HTML document<br>
20
+Some second line of HTML<br></div>
21
+
22
+--d8c92622e09101e4bc833685557b--

+ 20 - 0
spec/data_fixtures/imap2.eml

@@ -0,0 +1,20 @@
1
+From: John <john.doe@example.com>
2
+Date: Fri, 9 May 2014 17:00:00 +0900
3
+Message-ID: <bar.456@mail.example.com>
4
+Subject: Re: some subject
5
+To: Jane <jane.doe@example.com>, Nanashi <nanashi.gombeh@example.jp>
6
+MIME-Version: 1.0
7
+Content-Type: multipart/alternative; boundary=d8c92622e09101e4bc833685557b
8
+
9
+--d8c92622e09101e4bc833685557b
10
+Content-Type: text/plain; charset=UTF-8
11
+
12
+Some reply
13
+
14
+--d8c92622e09101e4bc833685557b
15
+Content-Type: text/html; charset=UTF-8
16
+Content-Transfer-Encoding: quoted-printable
17
+
18
+<div dir=3D"ltr">Some HTML reply<br></div>
19
+
20
+--d8c92622e09101e4bc833685557b--

+ 195 - 0
spec/models/agents/imap_folder_agent_spec.rb

@@ -0,0 +1,195 @@
1
+require 'spec_helper'
2
+require 'time'
3
+
4
+describe Agents::ImapFolderAgent do
5
+  describe 'checking IMAP' do
6
+    before do
7
+      @site = {
8
+        'expected_update_period_in_days' => 1,
9
+        'host' => 'mail.example.net',
10
+        'ssl' => true,
11
+        'username' => 'foo',
12
+        'password' => 'bar',
13
+        'folders' => ['INBOX'],
14
+        'conditions' => {
15
+        }
16
+      }
17
+      @checker = Agents::ImapFolderAgent.new(:name => 'Example', :options => @site, :keep_events_for => 2)
18
+      @checker.user = users(:bob)
19
+      @checker.save!
20
+
21
+      message_mixin = Module.new {
22
+        def uidvalidity
23
+          '100'
24
+        end
25
+
26
+        def has_attachment?
27
+          false
28
+        end
29
+
30
+        def body_parts(mime_types = %[text/plain text/enriched text/html])
31
+          mime_types.map { |type|
32
+            all_parts.find { |part|
33
+              part.mime_type == type
34
+            }
35
+          }.compact
36
+        end
37
+      }
38
+
39
+      @mails = [
40
+        Mail.read(Rails.root.join('spec/data_fixtures/imap1.eml')).tap { |mail|
41
+          mail.extend(message_mixin)
42
+          stub(mail).uid.returns(1)
43
+        },
44
+        Mail.read(Rails.root.join('spec/data_fixtures/imap2.eml')).tap { |mail|
45
+          mail.extend(message_mixin)
46
+          stub(mail).uid.returns(2)
47
+          stub(mail).has_attachment?.returns(true)
48
+        },
49
+      ]
50
+
51
+      stub(@checker).each_unread_mail.returns { |yielder|
52
+        @mails.each(&yielder)
53
+      }
54
+    end
55
+
56
+    describe 'validations' do
57
+      before do
58
+        @checker.should be_valid
59
+      end
60
+
61
+      it 'should validate the integer fields' do
62
+        @checker.options['expected_update_period_in_days'] = '2'
63
+        @checker.should be_valid
64
+
65
+        @checker.options['expected_update_period_in_days'] = 'nonsense'
66
+        @checker.should_not be_valid
67
+      end
68
+
69
+      it 'should validate the boolean fields' do
70
+        @checker.options['ssl'] = false
71
+        @checker.should be_valid
72
+
73
+        @checker.options['ssl'] = 'true'
74
+        @checker.should_not be_valid
75
+      end
76
+    end
77
+
78
+    describe '#check' do
79
+      it 'should check for mails and save memory' do
80
+        lambda { @checker.check }.should change { Event.count }.by(2)
81
+        @checker.memory['notified'].sort.should == @mails.map(&:message_id).sort
82
+        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
83
+          (seen[mail.uidvalidity] ||= []) << mail.uid
84
+        }
85
+
86
+        events = Event.last(2)
87
+        events.first.payload.should == {
88
+          'from' => 'nanashi.gombeh@example.jp',
89
+          'to' => ['jane.doe@example.com', 'john.doe@example.com'],
90
+          'cc' => [],
91
+          'date' => '2014-05-09T16:00:00+09:00',
92
+          'subject' => 'some subject',
93
+          'body' => "Some plain text\nSome second line\n",
94
+          'has_attachment' => false,
95
+          'matches' => {},
96
+          'mime_type' => 'text/plain',
97
+        }
98
+        events.last.payload.should == {
99
+          'from' => 'john.doe@example.com',
100
+          'to' => ['jane.doe@example.com', 'nanashi.gombeh@example.jp'],
101
+          'cc' => [],
102
+          'subject' => 'Re: some subject',
103
+          'body' => "Some reply\n",
104
+          'date' => '2014-05-09T17:00:00+09:00',
105
+          'has_attachment' => true,
106
+          'matches' => {},
107
+          'mime_type' => 'text/plain',
108
+        }
109
+
110
+        lambda { @checker.check }.should_not change { Event.count }
111
+      end
112
+
113
+      it 'should narrow mails by To' do
114
+        @checker.options['conditions']['to'] = 'John.Doe@*'
115
+
116
+        lambda { @checker.check }.should change { Event.count }.by(1)
117
+        @checker.memory['notified'].sort.should == [@mails.first.message_id]
118
+        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
119
+          (seen[mail.uidvalidity] ||= []) << mail.uid
120
+        }
121
+
122
+        Event.last.payload.should == {
123
+          'from' => 'nanashi.gombeh@example.jp',
124
+          'to' => ['jane.doe@example.com', 'john.doe@example.com'],
125
+          'cc' => [],
126
+          'date' => '2014-05-09T16:00:00+09:00',
127
+          'subject' => 'some subject',
128
+          'body' => "Some plain text\nSome second line\n",
129
+          'has_attachment' => false,
130
+          'matches' => {},
131
+          'mime_type' => 'text/plain',
132
+        }
133
+
134
+        lambda { @checker.check }.should_not change { Event.count }
135
+      end
136
+
137
+      it 'should perform regexp matching and save named captures' do
138
+        @checker.options['conditions'].update(
139
+          'subject' => '\ARe: (?<a>.+)',
140
+          'body'    => 'Some (?<b>.+) reply',
141
+        )
142
+
143
+        lambda { @checker.check }.should change { Event.count }.by(1)
144
+        @checker.memory['notified'].sort.should == [@mails.last.message_id]
145
+        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
146
+          (seen[mail.uidvalidity] ||= []) << mail.uid
147
+        }
148
+
149
+        Event.last.payload.should == {
150
+          'from' => 'john.doe@example.com',
151
+          'to' => ['jane.doe@example.com', 'nanashi.gombeh@example.jp'],
152
+          'cc' => [],
153
+          'subject' => 'Re: some subject',
154
+          'body' => "<div dir=\"ltr\">Some HTML reply<br></div>\n",
155
+          'date' => '2014-05-09T17:00:00+09:00',
156
+          'has_attachment' => true,
157
+          'matches' => { 'a' => 'some subject', 'b' => 'HTML' },
158
+          'mime_type' => 'text/html',
159
+        }
160
+
161
+        lambda { @checker.check }.should_not change { Event.count }
162
+      end
163
+
164
+      it 'should narrow mails by has_attachment (true)' do
165
+        @checker.options['conditions']['has_attachment'] = true
166
+
167
+        lambda { @checker.check }.should change { Event.count }.by(1)
168
+
169
+        Event.last.payload['subject'].should == 'Re: some subject'
170
+      end
171
+
172
+      it 'should narrow mails by has_attachment (false)' do
173
+        @checker.options['conditions']['has_attachment'] = false
174
+
175
+        lambda { @checker.check }.should change { Event.count }.by(1)
176
+
177
+        Event.last.payload['subject'].should == 'some subject'
178
+      end
179
+
180
+      it 'should narrow mail parts by MIME types' do
181
+        @checker.options['mime_types'] = %w[text/plain]
182
+        @checker.options['conditions'].update(
183
+          'subject' => '\ARe: (?<a>.+)',
184
+          'body'    => 'Some (?<b>.+) reply',
185
+        )
186
+
187
+        lambda { @checker.check }.should_not change { Event.count }
188
+        @checker.memory['notified'].sort.should == []
189
+        @checker.memory['seen'].should == @mails.each_with_object({}) { |mail, seen|
190
+          (seen[mail.uidvalidity] ||= []) << mail.uid
191
+        }
192
+      end
193
+    end
194
+  end
195
+end